Auto = read.csv("Auto.csv", header=T, na.strings="?")
Auto = na.omit(Auto)
summary(Auto)
## mpg cylinders displacement horsepower
## Min. : 9.00 Min. :3.000 Min. : 68.0 Min. : 46.0
## 1st Qu.:17.00 1st Qu.:4.000 1st Qu.:105.0 1st Qu.: 75.0
## Median :22.75 Median :4.000 Median :151.0 Median : 93.5
## Mean :23.45 Mean :5.472 Mean :194.4 Mean :104.5
## 3rd Qu.:29.00 3rd Qu.:8.000 3rd Qu.:275.8 3rd Qu.:126.0
## Max. :46.60 Max. :8.000 Max. :455.0 Max. :230.0
##
## weight acceleration year origin
## Min. :1613 Min. : 8.00 Min. :70.00 Min. :1.000
## 1st Qu.:2225 1st Qu.:13.78 1st Qu.:73.00 1st Qu.:1.000
## Median :2804 Median :15.50 Median :76.00 Median :1.000
## Mean :2978 Mean :15.54 Mean :75.98 Mean :1.577
## 3rd Qu.:3615 3rd Qu.:17.02 3rd Qu.:79.00 3rd Qu.:2.000
## Max. :5140 Max. :24.80 Max. :82.00 Max. :3.000
##
## name
## amc matador : 5
## ford pinto : 5
## toyota corolla : 5
## amc gremlin : 4
## amc hornet : 4
## chevrolet chevette: 4
## (Other) :365
attach(Auto)
Auto.lm.fit = lm(mpg~horsepower, data=Auto)
summary(Auto.lm.fit)
##
## Call:
## lm(formula = mpg ~ horsepower, data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.5710 -3.2592 -0.3435 2.7630 16.9240
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.935861 0.717499 55.66 <2e-16 ***
## horsepower -0.157845 0.006446 -24.49 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.906 on 390 degrees of freedom
## Multiple R-squared: 0.6059, Adjusted R-squared: 0.6049
## F-statistic: 599.7 on 1 and 390 DF, p-value: < 2.2e-16
The p-value corresponding to F-statistic is nearly 0, therefore, there is a relationship between horsepower and mpg.
RSE is 4.906, and mean value for the response is 23.44592, indicating a percentage error of roughly 21%. R-squared is 0.6059, indicating that 60.59% of the variance in mpg is explained by horsepower.
mean(mpg, na.rm=T)
## [1] 23.44592
4.906/mean(mpg, na.rm=T)*100.0
## [1] 20.92475
summary(Auto.lm.fit)$r.sq
## [1] 0.6059483
Coefficient of horsepower = -0.158<0, indicating the relationship is negative.
predict(Auto.lm.fit, data.frame(horsepower=c(98)), interval="confidence")
## fit lwr upr
## 1 24.46708 23.97308 24.96108
predict(Auto.lm.fit, data.frame(horsepower=c(98)), interval="prediction")
## fit lwr upr
## 1 24.46708 14.8094 34.12476
plot(horsepower, mpg, xlab="horsepower", ylab="mpg")
abline(Auto.lm.fit, lwd=2, col="red")
From the residuals, we can see there is non-linearity between the predictor and response.
par(mfrow=c(2,2))
plot(Auto.lm.fit)
pairs(Auto)
cor(Auto[-9])
## mpg cylinders displacement horsepower weight
## mpg 1.0000000 -0.7776175 -0.8051269 -0.7784268 -0.8322442
## cylinders -0.7776175 1.0000000 0.9508233 0.8429834 0.8975273
## displacement -0.8051269 0.9508233 1.0000000 0.8972570 0.9329944
## horsepower -0.7784268 0.8429834 0.8972570 1.0000000 0.8645377
## weight -0.8322442 0.8975273 0.9329944 0.8645377 1.0000000
## acceleration 0.4233285 -0.5046834 -0.5438005 -0.6891955 -0.4168392
## year 0.5805410 -0.3456474 -0.3698552 -0.4163615 -0.3091199
## origin 0.5652088 -0.5689316 -0.6145351 -0.4551715 -0.5850054
## acceleration year origin
## mpg 0.4233285 0.5805410 0.5652088
## cylinders -0.5046834 -0.3456474 -0.5689316
## displacement -0.5438005 -0.3698552 -0.6145351
## horsepower -0.6891955 -0.4163615 -0.4551715
## weight -0.4168392 -0.3091199 -0.5850054
## acceleration 1.0000000 0.2903161 0.2127458
## year 0.2903161 1.0000000 0.1815277
## origin 0.2127458 0.1815277 1.0000000
Auto.lm.fit2 = lm(mpg~.-name, data=Auto)
summary(Auto.lm.fit2)
##
## Call:
## lm(formula = mpg ~ . - name, data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.5903 -2.1565 -0.1169 1.8690 13.0604
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.218435 4.644294 -3.707 0.00024 ***
## cylinders -0.493376 0.323282 -1.526 0.12780
## displacement 0.019896 0.007515 2.647 0.00844 **
## horsepower -0.016951 0.013787 -1.230 0.21963
## weight -0.006474 0.000652 -9.929 < 2e-16 ***
## acceleration 0.080576 0.098845 0.815 0.41548
## year 0.750773 0.050973 14.729 < 2e-16 ***
## origin 1.426141 0.278136 5.127 4.67e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.328 on 384 degrees of freedom
## Multiple R-squared: 0.8215, Adjusted R-squared: 0.8182
## F-statistic: 252.4 on 7 and 384 DF, p-value: < 2.2e-16
The p-value corresponding to F-statistic is nearly 0, therefore, there is a relationship between the predictors and response.
From the individual p-value, we can see that displacement, weight, year and origin have a statistically significant relationship to the response.
The coefficient for the year suggests a positive relationship to mpg.
From the residuals plot, there is a discernable curve pattern, indicating this might not be an accurate fit. From the leverage plot, we can see that there are some points with high leverage. There are possible outliers as there are points with studentized residuals greater than 3.
(ncol(Auto)-1+1)/nrow(Auto)
## [1] 0.02295918
par(mfrow=c(2,2))
plot(Auto.lm.fit2)
From the correlation matrix, I chose two highest correlated pairs to create interaction effects. From the individual p-value, we can find that the interaction between displacement and weight is statistically significant while the interaction between cylinders and displacement is not.
Auto.lim.fit3 = lm(mpg~cylinders*displacement+displacement*weight)
summary(Auto.lim.fit3)
##
## Call:
## lm(formula = mpg ~ cylinders * displacement + displacement *
## weight)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.2934 -2.5184 -0.3476 1.8399 17.7723
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.262e+01 2.237e+00 23.519 < 2e-16 ***
## cylinders 7.606e-01 7.669e-01 0.992 0.322
## displacement -7.351e-02 1.669e-02 -4.403 1.38e-05 ***
## weight -9.888e-03 1.329e-03 -7.438 6.69e-13 ***
## cylinders:displacement -2.986e-03 3.426e-03 -0.872 0.384
## displacement:weight 2.128e-05 5.002e-06 4.254 2.64e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.103 on 386 degrees of freedom
## Multiple R-squared: 0.7272, Adjusted R-squared: 0.7237
## F-statistic: 205.8 on 5 and 386 DF, p-value: < 2.2e-16
Log transformation of mpg yields a better model fitting as it has a higher R-squared, and in the residuals plot, the pattern is not that obvious.
Auto.lm.fit4 = lm(sqrt(mpg)~.-name, data=Auto)
summary(Auto.lm.fit4)
##
## Call:
## lm(formula = sqrt(mpg) ~ . - name, data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.98891 -0.18946 0.00505 0.16947 1.02581
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.075e+00 4.290e-01 2.506 0.0126 *
## cylinders -5.942e-02 2.986e-02 -1.990 0.0474 *
## displacement 1.752e-03 6.942e-04 2.524 0.0120 *
## horsepower -2.512e-03 1.274e-03 -1.972 0.0493 *
## weight -6.367e-04 6.024e-05 -10.570 < 2e-16 ***
## acceleration 2.738e-03 9.131e-03 0.300 0.7644
## year 7.381e-02 4.709e-03 15.675 < 2e-16 ***
## origin 1.217e-01 2.569e-02 4.735 3.09e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3074 on 384 degrees of freedom
## Multiple R-squared: 0.8561, Adjusted R-squared: 0.8535
## F-statistic: 326.3 on 7 and 384 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(Auto.lm.fit4)
library(ISLR)
##
## Attaching package: 'ISLR'
## The following object is masked _by_ '.GlobalEnv':
##
## Auto
attach(Carseats)
Car.lm.fit = lm(Sales~Price+Urban+US, data=Carseats)
summary(Car.lm.fit)
##
## Call:
## lm(formula = Sales ~ Price + Urban + US, data = Carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9206 -1.6220 -0.0564 1.5786 7.0581
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.043469 0.651012 20.036 < 2e-16 ***
## Price -0.054459 0.005242 -10.389 < 2e-16 ***
## UrbanYes -0.021916 0.271650 -0.081 0.936
## USYes 1.200573 0.259042 4.635 4.86e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2335
## F-statistic: 41.52 on 3 and 396 DF, p-value: < 2.2e-16
Price: the p-value which is close to 0 indicates that there is a statistically significant relationship between price and sales. If price increases $1 while other variables are held fixed, sales decrease roughly 54 units.
Urban: p-value is very large, indicating there is no statistically significant relationship between the location of the store and its sales.
US: the p-value which is close to 0 indicates that there is a statistically significant relationship between whether the store is in US or not and sales. If the store is in US, the sales increases roughly 1,201 units.
Sales = 13.043469 - 0.054459Price - 0.021916UrbanYes + 1.200573USYes
As p-value corresponding to F-statistic are close to 0, we can reject the null hypothesis for Price and US.
Car.lm.fit2 = lm(Sales~Price+US, data=Carseats)
summary(Car.lm.fit2)
##
## Call:
## lm(formula = Sales ~ Price + US, data = Carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9269 -1.6286 -0.0574 1.5766 7.0515
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.03079 0.63098 20.652 < 2e-16 ***
## Price -0.05448 0.00523 -10.416 < 2e-16 ***
## USYes 1.19964 0.25846 4.641 4.71e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.469 on 397 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2354
## F-statistic: 62.43 on 2 and 397 DF, p-value: < 2.2e-16
As the two models have very similar RSE and R-squared, they fit the data similarly, while the model in (e) is slightly better.
confint(Car.lm.fit2, level=0.95)
## 2.5 % 97.5 %
## (Intercept) 11.79032020 14.27126531
## Price -0.06475984 -0.04419543
## USYes 0.69151957 1.70776632
From the studentized residuals plot, we can see that all studentized residuals are between -3 and 3, therefore there are no obvious outliers. From the leverage plot, we can see that there are some points with high leverage (leverage statistic much greater than the mean 0.0075).
plot(predict(Car.lm.fit2), rstudent(Car.lm.fit2))
(2+1)/nrow(Carseats) # mean leverage statistic
## [1] 0.0075
plot(hatvalues(Car.lm.fit2))
par(mfrow=c(2,2))
plot(Car.lm.fit2)
Coefficient estimate = 1.9939, standard error = 0.1065, t-statistic = 18.73, p-value is close to 0, indicating that there is a positive relationship between x and y, and the null hypothesis is rejected.
set.seed(1)
x=rnorm(100)
y=2*x + rnorm(100)
lm.fit = lm(y~x+0)
summary(lm.fit)
##
## Call:
## lm(formula = y ~ x + 0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9154 -0.6472 -0.1771 0.5056 2.3109
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## x 1.9939 0.1065 18.73 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9586 on 99 degrees of freedom
## Multiple R-squared: 0.7798, Adjusted R-squared: 0.7776
## F-statistic: 350.7 on 1 and 99 DF, p-value: < 2.2e-16
Coefficient estimate = 0.39111, standard error = 0.02089, t-statistic = 18.73, p-value is close to 0, indicating that there is a positive relationship between y and x, and the null hypothesis is rejected.
lm.fit2 = lm(x~y+0)
summary(lm.fit2)
##
## Call:
## lm(formula = x ~ y + 0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8699 -0.2368 0.1030 0.2858 0.8938
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## y 0.39111 0.02089 18.73 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4246 on 99 degrees of freedom
## Multiple R-squared: 0.7798, Adjusted R-squared: 0.7776
## F-statistic: 350.7 on 1 and 99 DF, p-value: < 2.2e-16
The t-statistic and p-value are the same. The product of coefficients in (a) and (b) is close to 1.
The result is 18.73, which is the same as the t-statistic obtained in (a) and (b).
sqrt(length(x)-1)*sum(x*y)/(sqrt(sum(x^2)*sum(y^2)-(sum(x*y))^2))
## [1] 18.72593
If we change the positions of x and y, we will obtain the same result of t-statistic using the formula in (d).
Both t-statistic are 18.556.
lm.fit3 = lm(y~x)
summary(lm.fit3)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8768 -0.6138 -0.1395 0.5394 2.3462
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.03769 0.09699 -0.389 0.698
## x 1.99894 0.10773 18.556 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9628 on 98 degrees of freedom
## Multiple R-squared: 0.7784, Adjusted R-squared: 0.7762
## F-statistic: 344.3 on 1 and 98 DF, p-value: < 2.2e-16
lm.fit4 = lm(x~y)
summary(lm.fit4)
##
## Call:
## lm(formula = x ~ y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.90848 -0.28101 0.06274 0.24570 0.85736
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.03880 0.04266 0.91 0.365
## y 0.38942 0.02099 18.56 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4249 on 98 degrees of freedom
## Multiple R-squared: 0.7784, Adjusted R-squared: 0.7762
## F-statistic: 344.3 on 1 and 98 DF, p-value: < 2.2e-16
When the sum of squared x values equals the sum of squared y values.
Coefficient of X is 1.9939 and coefficient of Y is 0.39111.
set.seed(1)
X = rnorm(100)
Y = 2*X + rnorm(100)
summary(lm(Y~X+0))
##
## Call:
## lm(formula = Y ~ X + 0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9154 -0.6472 -0.1771 0.5056 2.3109
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## X 1.9939 0.1065 18.73 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9586 on 99 degrees of freedom
## Multiple R-squared: 0.7798, Adjusted R-squared: 0.7776
## F-statistic: 350.7 on 1 and 99 DF, p-value: < 2.2e-16
summary(lm(X~Y+0))
##
## Call:
## lm(formula = X ~ Y + 0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8699 -0.2368 0.1030 0.2858 0.8938
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## Y 0.39111 0.02089 18.73 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4246 on 99 degrees of freedom
## Multiple R-squared: 0.7798, Adjusted R-squared: 0.7776
## F-statistic: 350.7 on 1 and 99 DF, p-value: < 2.2e-16
Coefficients of both regressions are -0.02148.
set.seed(1)
X = rnorm(100)
Y = -sample(X, 100)
summary(lm(Y~X+0))
##
## Call:
## lm(formula = Y ~ X + 0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.3926 -0.6877 -0.1027 0.5124 2.2315
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## X -0.02148 0.10048 -0.214 0.831
##
## Residual standard error: 0.9046 on 99 degrees of freedom
## Multiple R-squared: 0.0004614, Adjusted R-squared: -0.009635
## F-statistic: 0.0457 on 1 and 99 DF, p-value: 0.8312
summary(lm(X~Y+0))
##
## Call:
## lm(formula = X ~ Y + 0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.2400 -0.5154 0.1213 0.6788 2.3959
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## Y -0.02148 0.10048 -0.214 0.831
##
## Residual standard error: 0.9046 on 99 degrees of freedom
## Multiple R-squared: 0.0004614, Adjusted R-squared: -0.009635
## F-statistic: 0.0457 on 1 and 99 DF, p-value: 0.8312
set.seed(1)
x = rnorm(100)
eps = rnorm(100, mean=0, sd=0.25)
The length of y is 100, beta0 = -1 and beta1 = 0.5.
y = -1+0.5*x+eps
length(y)
## [1] 100
x and y appear to have a linear relationship.
plot(x, y)
beta0 estimate = -1.00942 and beta1 estimate = 0.49973. The p-value corresponding to F-statistic indicates that there is a relationship between x and y, and the coefficient estimates are very close to the true values.
lm.fit = lm(y~x)
summary(lm.fit)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.46921 -0.15344 -0.03487 0.13485 0.58654
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.00942 0.02425 -41.63 <2e-16 ***
## x 0.49973 0.02693 18.56 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2407 on 98 degrees of freedom
## Multiple R-squared: 0.7784, Adjusted R-squared: 0.7762
## F-statistic: 344.3 on 1 and 98 DF, p-value: < 2.2e-16
plot(x, y)
abline(lm.fit, col="red")
abline(-1, 0.5, col="yellow")
legend(-1.5, legend=c("model fit","population"), col=c("red","yellow"), lwd=3)
The model fit improves a little as RSE decreases slightly and R-squared increases slightly. However, the p-value of x^2 indicates that there is no statistically significant relationship between x^2 and y.
lm.fit2 = lm(y~x+I(x^2))
summary(lm.fit2)
##
## Call:
## lm(formula = y ~ x + I(x^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4913 -0.1563 -0.0322 0.1451 0.5675
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.98582 0.02941 -33.516 <2e-16 ***
## x 0.50429 0.02700 18.680 <2e-16 ***
## I(x^2) -0.02973 0.02119 -1.403 0.164
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2395 on 97 degrees of freedom
## Multiple R-squared: 0.7828, Adjusted R-squared: 0.7784
## F-statistic: 174.8 on 2 and 97 DF, p-value: < 2.2e-16
Three confidence intervals should all center around the same numbers. The less noisy data should have narrower confidence intervals while the noisier data should have wider confidence intervals.
confint(lm.fit)
## 2.5 % 97.5 %
## (Intercept) -1.0575402 -0.9613061
## x 0.4462897 0.5531801
y = 2+2x1+0.3x2. beta0 = 2, beta1 = 2 and beta2 = 0.3.
set.seed(1)
x1 = runif(100)
x2 = 0.5*x1+rnorm(100)/10
y = 2+2*x1+0.3*x2+rnorm(100)
There appears to be a positive linear relationship between x1 and x2.
plot(x1, x2)
beta0 estimate = 2.1305, beta1 estimate = 1.4396 and beta2 estimate = 1.0097. beta0 estimate is close to true beta0 while beta1/beta2 are not accurate estimates of true values. The p-values are too large for confidence level 5%, therefore we cannot reject the null hypothesis.
lm.fit = lm(y~x1+x2)
summary(lm.fit)
##
## Call:
## lm(formula = y ~ x1 + x2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8311 -0.7273 -0.0537 0.6338 2.3359
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.1305 0.2319 9.188 7.61e-15 ***
## x1 1.4396 0.7212 1.996 0.0487 *
## x2 1.0097 1.1337 0.891 0.3754
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.056 on 97 degrees of freedom
## Multiple R-squared: 0.2088, Adjusted R-squared: 0.1925
## F-statistic: 12.8 on 2 and 97 DF, p-value: 1.164e-05
This is a slightly better model fit compared to (c) based on the slightly improved RSE and R-squared. beta1 is close to true value. The p-value is close to 0, and we can reject the null hypothesis.
lm.fit2 = lm(y~x1)
summary(lm.fit2)
##
## Call:
## lm(formula = y ~ x1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.89495 -0.66874 -0.07785 0.59221 2.45560
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.1124 0.2307 9.155 8.27e-15 ***
## x1 1.9759 0.3963 4.986 2.66e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.055 on 98 degrees of freedom
## Multiple R-squared: 0.2024, Adjusted R-squared: 0.1942
## F-statistic: 24.86 on 1 and 98 DF, p-value: 2.661e-06
This is the worst model fit among the three. beta1 is far from true value. The p-value is close to 0, and we can reject the null hypothesis.
lm.fit3 = lm(y~x2)
summary(lm.fit3)
##
## Call:
## lm(formula = y ~ x2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.62687 -0.75156 -0.03598 0.72383 2.44890
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.3899 0.1949 12.26 < 2e-16 ***
## x2 2.8996 0.6330 4.58 1.37e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.072 on 98 degrees of freedom
## Multiple R-squared: 0.1763, Adjusted R-squared: 0.1679
## F-statistic: 20.98 on 1 and 98 DF, p-value: 1.366e-05
We can see that if we use only one of x1 and x2 to fit the model, they both appear to have statistically significant relationship with y. As they are collinear, it is hard to clearly identify their effects when predict y using both of them.
For model 1 and model 3, the newly added point becomes a high-leverage point. For model 2, it becomes an outlier.
x1 = c(x1, 0.1)
x2 = c(x2, 0.8)
y = c(y, 6)
lm.fit4 = lm(y~x1+x2)
summary(lm.fit4)
##
## Call:
## lm(formula = y ~ x1 + x2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.73348 -0.69318 -0.05263 0.66385 2.30619
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.2267 0.2314 9.624 7.91e-16 ***
## x1 0.5394 0.5922 0.911 0.36458
## x2 2.5146 0.8977 2.801 0.00614 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.075 on 98 degrees of freedom
## Multiple R-squared: 0.2188, Adjusted R-squared: 0.2029
## F-statistic: 13.72 on 2 and 98 DF, p-value: 5.564e-06
par(mfrow=c(2,2))
plot(lm.fit4)
lm.fit5 = lm(y~x1)
summary(lm.fit5)
##
## Call:
## lm(formula = y ~ x1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8897 -0.6556 -0.0909 0.5682 3.5665
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.2569 0.2390 9.445 1.78e-15 ***
## x1 1.7657 0.4124 4.282 4.29e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.111 on 99 degrees of freedom
## Multiple R-squared: 0.1562, Adjusted R-squared: 0.1477
## F-statistic: 18.33 on 1 and 99 DF, p-value: 4.295e-05
par(mfrow=c(2,2))
plot(lm.fit5)
lm.fit6 = lm(y~x2)
summary(lm.fit6)
##
## Call:
## lm(formula = y ~ x2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.64729 -0.71021 -0.06899 0.72699 2.38074
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.3451 0.1912 12.264 < 2e-16 ***
## x2 3.1190 0.6040 5.164 1.25e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.074 on 99 degrees of freedom
## Multiple R-squared: 0.2122, Adjusted R-squared: 0.2042
## F-statistic: 26.66 on 1 and 99 DF, p-value: 1.253e-06
par(mfrow=c(2,2))
plot(lm.fit6)
All predictors have a statistically significant association with the response except chas.
library(MASS)
Boston$chas <- as.factor(Boston$chas)
attach(Boston)
lm.zn = lm(crim~zn, data=Boston)
summary(lm.zn) # yes
##
## Call:
## lm(formula = crim ~ zn, data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.429 -4.222 -2.620 1.250 84.523
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.45369 0.41722 10.675 < 2e-16 ***
## zn -0.07393 0.01609 -4.594 5.51e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.435 on 504 degrees of freedom
## Multiple R-squared: 0.04019, Adjusted R-squared: 0.03828
## F-statistic: 21.1 on 1 and 504 DF, p-value: 5.506e-06
lm.indus = lm(crim~indus, data=Boston)
summary(lm.indus) # yes
##
## Call:
## lm(formula = crim ~ indus, data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.972 -2.698 -0.736 0.712 81.813
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.06374 0.66723 -3.093 0.00209 **
## indus 0.50978 0.05102 9.991 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.866 on 504 degrees of freedom
## Multiple R-squared: 0.1653, Adjusted R-squared: 0.1637
## F-statistic: 99.82 on 1 and 504 DF, p-value: < 2.2e-16
lm.chas = lm(crim~chas, data=Boston)
summary(lm.chas) # no
##
## Call:
## lm(formula = crim ~ chas, data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.738 -3.661 -3.435 0.018 85.232
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.7444 0.3961 9.453 <2e-16 ***
## chas1 -1.8928 1.5061 -1.257 0.209
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.597 on 504 degrees of freedom
## Multiple R-squared: 0.003124, Adjusted R-squared: 0.001146
## F-statistic: 1.579 on 1 and 504 DF, p-value: 0.2094
lm.nox = lm(crim~nox, data=Boston)
summary(lm.nox) # yes
##
## Call:
## lm(formula = crim ~ nox, data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.371 -2.738 -0.974 0.559 81.728
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -13.720 1.699 -8.073 5.08e-15 ***
## nox 31.249 2.999 10.419 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.81 on 504 degrees of freedom
## Multiple R-squared: 0.1772, Adjusted R-squared: 0.1756
## F-statistic: 108.6 on 1 and 504 DF, p-value: < 2.2e-16
lm.rm = lm(crim~rm, data=Boston)
summary(lm.rm) # yes
##
## Call:
## lm(formula = crim ~ rm, data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.604 -3.952 -2.654 0.989 87.197
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 20.482 3.365 6.088 2.27e-09 ***
## rm -2.684 0.532 -5.045 6.35e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.401 on 504 degrees of freedom
## Multiple R-squared: 0.04807, Adjusted R-squared: 0.04618
## F-statistic: 25.45 on 1 and 504 DF, p-value: 6.347e-07
lm.age = lm(crim~age, data=Boston)
summary(lm.age) # yes
##
## Call:
## lm(formula = crim ~ age, data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.789 -4.257 -1.230 1.527 82.849
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.77791 0.94398 -4.002 7.22e-05 ***
## age 0.10779 0.01274 8.463 2.85e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.057 on 504 degrees of freedom
## Multiple R-squared: 0.1244, Adjusted R-squared: 0.1227
## F-statistic: 71.62 on 1 and 504 DF, p-value: 2.855e-16
lm.dis = lm(crim~dis, data=Boston)
summary(lm.dis) # yes
##
## Call:
## lm(formula = crim ~ dis, data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.708 -4.134 -1.527 1.516 81.674
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.4993 0.7304 13.006 <2e-16 ***
## dis -1.5509 0.1683 -9.213 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.965 on 504 degrees of freedom
## Multiple R-squared: 0.1441, Adjusted R-squared: 0.1425
## F-statistic: 84.89 on 1 and 504 DF, p-value: < 2.2e-16
lm.rad = lm(crim~rad, data=Boston)
summary(lm.rad) # yes
##
## Call:
## lm(formula = crim ~ rad, data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.164 -1.381 -0.141 0.660 76.433
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.28716 0.44348 -5.157 3.61e-07 ***
## rad 0.61791 0.03433 17.998 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.718 on 504 degrees of freedom
## Multiple R-squared: 0.3913, Adjusted R-squared: 0.39
## F-statistic: 323.9 on 1 and 504 DF, p-value: < 2.2e-16
lm.tax = lm(crim~tax, data=Boston)
summary(lm.tax) # yes
##
## Call:
## lm(formula = crim ~ tax, data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.513 -2.738 -0.194 1.065 77.696
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.528369 0.815809 -10.45 <2e-16 ***
## tax 0.029742 0.001847 16.10 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.997 on 504 degrees of freedom
## Multiple R-squared: 0.3396, Adjusted R-squared: 0.3383
## F-statistic: 259.2 on 1 and 504 DF, p-value: < 2.2e-16
lm.ptratio = lm(crim~ptratio, data=Boston)
summary(lm.ptratio) # yes
##
## Call:
## lm(formula = crim ~ ptratio, data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.654 -3.985 -1.912 1.825 83.353
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.6469 3.1473 -5.607 3.40e-08 ***
## ptratio 1.1520 0.1694 6.801 2.94e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.24 on 504 degrees of freedom
## Multiple R-squared: 0.08407, Adjusted R-squared: 0.08225
## F-statistic: 46.26 on 1 and 504 DF, p-value: 2.943e-11
lm.black = lm(crim~black, data=Boston)
summary(lm.black) # yes
##
## Call:
## lm(formula = crim ~ black, data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.756 -2.299 -2.095 -1.296 86.822
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.553529 1.425903 11.609 <2e-16 ***
## black -0.036280 0.003873 -9.367 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.946 on 504 degrees of freedom
## Multiple R-squared: 0.1483, Adjusted R-squared: 0.1466
## F-statistic: 87.74 on 1 and 504 DF, p-value: < 2.2e-16
lm.lstat = lm(crim~lstat, data=Boston)
summary(lm.lstat) # yes
##
## Call:
## lm(formula = crim ~ lstat, data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.925 -2.822 -0.664 1.079 82.862
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.33054 0.69376 -4.801 2.09e-06 ***
## lstat 0.54880 0.04776 11.491 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.664 on 504 degrees of freedom
## Multiple R-squared: 0.2076, Adjusted R-squared: 0.206
## F-statistic: 132 on 1 and 504 DF, p-value: < 2.2e-16
lm.medv = lm(crim~medv, data=Boston)
summary(lm.medv) # yes
##
## Call:
## lm(formula = crim ~ medv, data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.071 -4.022 -2.343 1.298 80.957
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.79654 0.93419 12.63 <2e-16 ***
## medv -0.36316 0.03839 -9.46 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.934 on 504 degrees of freedom
## Multiple R-squared: 0.1508, Adjusted R-squared: 0.1491
## F-statistic: 89.49 on 1 and 504 DF, p-value: < 2.2e-16
For zn, nox, dis, rad, black, lstat, and medv, we can reject the null hypothesis.
lm.all = lm(crim~., data=Boston)
summary(lm.all)
##
## Call:
## lm(formula = crim ~ ., data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.924 -2.120 -0.353 1.019 75.051
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.033228 7.234903 2.354 0.018949 *
## zn 0.044855 0.018734 2.394 0.017025 *
## indus -0.063855 0.083407 -0.766 0.444294
## chas1 -0.749134 1.180147 -0.635 0.525867
## nox -10.313535 5.275536 -1.955 0.051152 .
## rm 0.430131 0.612830 0.702 0.483089
## age 0.001452 0.017925 0.081 0.935488
## dis -0.987176 0.281817 -3.503 0.000502 ***
## rad 0.588209 0.088049 6.680 6.46e-11 ***
## tax -0.003780 0.005156 -0.733 0.463793
## ptratio -0.271081 0.186450 -1.454 0.146611
## black -0.007538 0.003673 -2.052 0.040702 *
## lstat 0.126211 0.075725 1.667 0.096208 .
## medv -0.198887 0.060516 -3.287 0.001087 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.439 on 492 degrees of freedom
## Multiple R-squared: 0.454, Adjusted R-squared: 0.4396
## F-statistic: 31.47 on 13 and 492 DF, p-value: < 2.2e-16
The coefficient of nox is nearly 31 in univariate regression model and -10 in multivariate regression model.
x <- c(coef(lm.zn)[2], coef(lm.indus)[2], coef(lm.chas)[2], coef(lm.nox)[2],
coef(lm.rm)[2], coef(lm.age)[2], coef(lm.dis)[2], coef(lm.rad)[2],
coef(lm.tax)[2], coef(lm.ptratio)[2], coef(lm.black)[2],
coef(lm.lstat)[2], coef(lm.medv)[2])
y <- coef(lm.all)[2:14]
plot(x, y)
lm.zn.3 = lm(crim~poly(zn,3), data=Boston)
summary(lm.zn.3) # 1st, 2nd
##
## Call:
## lm(formula = crim ~ poly(zn, 3), data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.821 -4.614 -1.294 0.473 84.130
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.6135 0.3722 9.709 < 2e-16 ***
## poly(zn, 3)1 -38.7498 8.3722 -4.628 4.7e-06 ***
## poly(zn, 3)2 23.9398 8.3722 2.859 0.00442 **
## poly(zn, 3)3 -10.0719 8.3722 -1.203 0.22954
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.372 on 502 degrees of freedom
## Multiple R-squared: 0.05824, Adjusted R-squared: 0.05261
## F-statistic: 10.35 on 3 and 502 DF, p-value: 1.281e-06
lm.indus.3 = lm(crim~poly(indus,3), data=Boston)
summary(lm.indus.3) # 1st, 2nd, 3rd
##
## Call:
## lm(formula = crim ~ poly(indus, 3), data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.278 -2.514 0.054 0.764 79.713
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.614 0.330 10.950 < 2e-16 ***
## poly(indus, 3)1 78.591 7.423 10.587 < 2e-16 ***
## poly(indus, 3)2 -24.395 7.423 -3.286 0.00109 **
## poly(indus, 3)3 -54.130 7.423 -7.292 1.2e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.423 on 502 degrees of freedom
## Multiple R-squared: 0.2597, Adjusted R-squared: 0.2552
## F-statistic: 58.69 on 3 and 502 DF, p-value: < 2.2e-16
lm.nox.3 = lm(crim~poly(nox,3), data=Boston)
summary(lm.nox.3) # 1st, 2nd, 3rd
##
## Call:
## lm(formula = crim ~ poly(nox, 3), data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.110 -2.068 -0.255 0.739 78.302
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.6135 0.3216 11.237 < 2e-16 ***
## poly(nox, 3)1 81.3720 7.2336 11.249 < 2e-16 ***
## poly(nox, 3)2 -28.8286 7.2336 -3.985 7.74e-05 ***
## poly(nox, 3)3 -60.3619 7.2336 -8.345 6.96e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.234 on 502 degrees of freedom
## Multiple R-squared: 0.297, Adjusted R-squared: 0.2928
## F-statistic: 70.69 on 3 and 502 DF, p-value: < 2.2e-16
lm.rm.3 = lm(crim~poly(rm,3), data=Boston)
summary(lm.rm.3) # 1st, 2nd
##
## Call:
## lm(formula = crim ~ poly(rm, 3), data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.485 -3.468 -2.221 -0.015 87.219
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.6135 0.3703 9.758 < 2e-16 ***
## poly(rm, 3)1 -42.3794 8.3297 -5.088 5.13e-07 ***
## poly(rm, 3)2 26.5768 8.3297 3.191 0.00151 **
## poly(rm, 3)3 -5.5103 8.3297 -0.662 0.50858
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.33 on 502 degrees of freedom
## Multiple R-squared: 0.06779, Adjusted R-squared: 0.06222
## F-statistic: 12.17 on 3 and 502 DF, p-value: 1.067e-07
lm.age.3 = lm(crim~poly(age,3), data=Boston)
summary(lm.age.3) # 1st, 2nd, 3rd
##
## Call:
## lm(formula = crim ~ poly(age, 3), data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.762 -2.673 -0.516 0.019 82.842
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.6135 0.3485 10.368 < 2e-16 ***
## poly(age, 3)1 68.1820 7.8397 8.697 < 2e-16 ***
## poly(age, 3)2 37.4845 7.8397 4.781 2.29e-06 ***
## poly(age, 3)3 21.3532 7.8397 2.724 0.00668 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.84 on 502 degrees of freedom
## Multiple R-squared: 0.1742, Adjusted R-squared: 0.1693
## F-statistic: 35.31 on 3 and 502 DF, p-value: < 2.2e-16
lm.dis.3 = lm(crim~poly(dis,3), data=Boston)
summary(lm.dis.3) # 1st, 2nd, 3rd
##
## Call:
## lm(formula = crim ~ poly(dis, 3), data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.757 -2.588 0.031 1.267 76.378
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.6135 0.3259 11.087 < 2e-16 ***
## poly(dis, 3)1 -73.3886 7.3315 -10.010 < 2e-16 ***
## poly(dis, 3)2 56.3730 7.3315 7.689 7.87e-14 ***
## poly(dis, 3)3 -42.6219 7.3315 -5.814 1.09e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.331 on 502 degrees of freedom
## Multiple R-squared: 0.2778, Adjusted R-squared: 0.2735
## F-statistic: 64.37 on 3 and 502 DF, p-value: < 2.2e-16
lm.rad.3 = lm(crim~poly(rad,3), data=Boston)
summary(lm.rad.3) # 1st, 2nd
##
## Call:
## lm(formula = crim ~ poly(rad, 3), data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.381 -0.412 -0.269 0.179 76.217
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.6135 0.2971 12.164 < 2e-16 ***
## poly(rad, 3)1 120.9074 6.6824 18.093 < 2e-16 ***
## poly(rad, 3)2 17.4923 6.6824 2.618 0.00912 **
## poly(rad, 3)3 4.6985 6.6824 0.703 0.48231
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.682 on 502 degrees of freedom
## Multiple R-squared: 0.4, Adjusted R-squared: 0.3965
## F-statistic: 111.6 on 3 and 502 DF, p-value: < 2.2e-16
lm.tax.3 = lm(crim~poly(tax,3), data=Boston)
summary(lm.tax.3) # 1st, 2nd
##
## Call:
## lm(formula = crim ~ poly(tax, 3), data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.273 -1.389 0.046 0.536 76.950
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.6135 0.3047 11.860 < 2e-16 ***
## poly(tax, 3)1 112.6458 6.8537 16.436 < 2e-16 ***
## poly(tax, 3)2 32.0873 6.8537 4.682 3.67e-06 ***
## poly(tax, 3)3 -7.9968 6.8537 -1.167 0.244
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.854 on 502 degrees of freedom
## Multiple R-squared: 0.3689, Adjusted R-squared: 0.3651
## F-statistic: 97.8 on 3 and 502 DF, p-value: < 2.2e-16
lm.ptratio.3 = lm(crim~poly(ptratio,3), data=Boston)
summary(lm.ptratio.3) # 1st, 2nd, 3rd
##
## Call:
## lm(formula = crim ~ poly(ptratio, 3), data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.833 -4.146 -1.655 1.408 82.697
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.614 0.361 10.008 < 2e-16 ***
## poly(ptratio, 3)1 56.045 8.122 6.901 1.57e-11 ***
## poly(ptratio, 3)2 24.775 8.122 3.050 0.00241 **
## poly(ptratio, 3)3 -22.280 8.122 -2.743 0.00630 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.122 on 502 degrees of freedom
## Multiple R-squared: 0.1138, Adjusted R-squared: 0.1085
## F-statistic: 21.48 on 3 and 502 DF, p-value: 4.171e-13
lm.black.3 = lm(crim~poly(black,3), data=Boston)
summary(lm.black.3) # 1st
##
## Call:
## lm(formula = crim ~ poly(black, 3), data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.096 -2.343 -2.128 -1.439 86.790
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.6135 0.3536 10.218 <2e-16 ***
## poly(black, 3)1 -74.4312 7.9546 -9.357 <2e-16 ***
## poly(black, 3)2 5.9264 7.9546 0.745 0.457
## poly(black, 3)3 -4.8346 7.9546 -0.608 0.544
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.955 on 502 degrees of freedom
## Multiple R-squared: 0.1498, Adjusted R-squared: 0.1448
## F-statistic: 29.49 on 3 and 502 DF, p-value: < 2.2e-16
lm.lstat.3 = lm(crim~poly(lstat,3), data=Boston)
summary(lm.lstat.3) # 1st, 2nd
##
## Call:
## lm(formula = crim ~ poly(lstat, 3), data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.234 -2.151 -0.486 0.066 83.353
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.6135 0.3392 10.654 <2e-16 ***
## poly(lstat, 3)1 88.0697 7.6294 11.543 <2e-16 ***
## poly(lstat, 3)2 15.8882 7.6294 2.082 0.0378 *
## poly(lstat, 3)3 -11.5740 7.6294 -1.517 0.1299
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.629 on 502 degrees of freedom
## Multiple R-squared: 0.2179, Adjusted R-squared: 0.2133
## F-statistic: 46.63 on 3 and 502 DF, p-value: < 2.2e-16
lm.medv.3 = lm(crim~poly(medv,3), data=Boston)
summary(lm.medv.3) # 1st, 2nd, 3rd
##
## Call:
## lm(formula = crim ~ poly(medv, 3), data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.427 -1.976 -0.437 0.439 73.655
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.614 0.292 12.374 < 2e-16 ***
## poly(medv, 3)1 -75.058 6.569 -11.426 < 2e-16 ***
## poly(medv, 3)2 88.086 6.569 13.409 < 2e-16 ***
## poly(medv, 3)3 -48.033 6.569 -7.312 1.05e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.569 on 502 degrees of freedom
## Multiple R-squared: 0.4202, Adjusted R-squared: 0.4167
## F-statistic: 121.3 on 3 and 502 DF, p-value: < 2.2e-16